In this post, we use open data and R to look at the distribution of shootings in space and time in Baltimore.

The complete code is posted in the document but the snippets are hidden unless you click them. For example, clicking code to the bottom-right of this sentence reveals the R script you would use to download all the data on shootings.

library(tidyverse)
library(scales)
library(knitr)

bpd <- read_csv("https://raw.githubusercontent.com/peterphalen/ceasefire/master/BPD_Part_1_Victim_Based_Crime_Data.csv")

# subset to shootings or homicides with a firearm
bpd <- subset(bpd, Description == "SHOOTING" |
                (Description == "HOMICIDE" & Weapon == "FIREARM"))

bpd$CrimeDate <- as.Date(bpd$CrimeDate, format = "%m/%d/%Y")

Maps of shootings by neighborhood

You can tap neighborhoods to see exact numbers.

It’s important to remember that all heatmaps of data are misleading. The map of raw shooting counts will overestimate shootings in areas with many residents and understimate shootings in areas with fewer residents. On the other hand, the population-adjusted map will paint a misleading picture of areas with fewer residents. For example, you’ll notice that the population-adjusted rate of shootings in the “Fairfield Area” neighborhood, which has only 159 residents, is likely an extreme overestimate.

Raw counts

This map shows the raw count of murders in Baltimore since 2012 by neighborhood.

library(geojsonio)
library(leaflet)

bpd <- subset(bpd, !is.na(Neighborhood))

# count by neighobrhood
count <- bpd %>%
  group_by(Neighborhood) %>%
  summarise(total.count=n()) 

# get polygons to draw neighborhood maps
nbds <- geojsonio::geojson_read("/Users/peterphalen/Documents/ceasefire/Neighborhoods.geojson", what = "sp")

get_shooting_count <- function(neighborhood){
  nbd <- as.character(neighborhood)
  if(nbd %in% count$Neighborhood){
    count <- count[count$Neighborhood == nbd,]$total.count
    return(count)
  }
  if(!(nbd %in% count$Neighborhood)){
    return(0)
  }
}

nbds$count <- sapply(nbds$Name, get_shooting_count)

# draw legend
range.count <- range(nbds$count,na.rm=T)
labs <- c(0,50,100,150,200,250)
pal.crime <- colorNumeric(colorRamp(c('#ccccff', 'red')), labs)

leaflet(nbds) %>% 
  addProviderTiles(providers$CartoDB.PositronNoLabels) %>%
  addPolygons(stroke=T,
              weight=1,
              popup=paste0(nbds$Name,"<br/>Shootings: ",nbds$count),
              color=~pal.crime(count),
              fillOpacity=.5) %>%
  addLegend("bottomright",title="# of shootings (2012-present)",colors=~pal.crime(labs),labels=~labs)

Population-adjusted

This version adjusts by the population of each neighborhood. Be careful about the super bright red areas, because some of them have very few residents and so even a single shooting will make them look really dangerous even though they’re probably not. (You can tap neighborhoods to see the number of residents.)

#--------- population-adjusted --------------#
nbds$per1k <- nbds$count / nbds$Population * 1000
nbds$per1k <- round(nbds$per1k)
nbds$per1k <- ifelse(nbds$Population == 0, NA, nbds$per1k)
labs <- c(0,25,50,75)
pal.crime <- colorNumeric(colorRamp(c('#ccccff', 'red')), 
                          labs,
                          na.color = "#b2b2b2")

countlabel <- paste0(nbds$Name,"<br/>",nbds$count," shootings among ",nbds$Population," residents")
nbds$countlabel <- ifelse(nbds$Population == 0, paste0(nbds$Name,":<br/>","No residents"), countlabel)

leaflet(nbds) %>% #draw population-adjusted map,
                  #areas with 0 residents are greyed 
                  #out but can still be clicked
  addProviderTiles(providers$CartoDB.PositronNoLabels) %>%
  addPolygons(stroke=T,
              weight=1,
              popup=nbds$countlabel,
              color=~pal.crime(per1k),
              fillOpacity=.6) %>%
  addLegend("bottomright",title="Shootings per one</br>thousand residents</br>(2012-present)",colors=~pal.crime(labs),labels=~labs)

Districts

Neighborhoods in our dataset have been categorized by district. Here is a map of all the districts. The holes in the map are neighborhoods that had zero observed shootings between 2012 and 2019.

bpd <- bpd[complete.cases(bpd$District, bpd$Neighborhood),]
# 23 neighborhoods have been coded in two different districts, presumably on accident
# we just pick the first district for now
district.index <- bpd %>% group_by(Neighborhood) %>% summarise(District = unique(District)[1])

get_district <- function(neighborhood){
  nbd <- as.character(neighborhood)
  if (nbd %in% district.index$Neighborhood){
  dist <- district.index[which(district.index$Neighborhood == nbd),]$District
  return(dist)
  }else{
  return(NA)
  }
}

nbds$district <- sapply(nbds$Name, get_district)

gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}

labs <- unique(bpd$District)
pal.districts <- colorFactor(gg_color_hue(length(labs)), 
                             labs,
                             na.color="#00000000")

leaflet(nbds) %>% 
  addProviderTiles(providers$CartoDB.PositronNoLabels) %>%
  addPolygons(stroke=T,
              weight=1,
              color=~pal.districts(district),
              popup=ifelse(!is.na(nbds$district), paste0(nbds$Name," (",nbds$district,")"),"No shootings recorded in this area"),
              fillOpacity=.5) %>%
  addLegend("bottomright",title="Districts",colors=~pal.districts(labs),labels=~labs)